Code
library(sf)AFROMPA’R Bouaké 2025
On se propoe de passer en revue une partie du programme de l’été à travers un exercice pédagogique associant statistique, cartographie, analyse spatiale, modélisation … Cet exercice correspond typiquement au travail qu’on pourrait donner à des étudiants de licence 2 ou 3 ayant suivi des cours de statistique, cartographie et SIG.
On propose de réaliser une analyse de la distribution de l’Indicateur de Développement humain des régions de Côte d’Ivoire entre 2002 et 2022 à l’aide des données accessibles sur le site Global Data Lab . Le programme est rédigé de telle sorte qu’on puisse facilement refaire l’exercice en prenant d’autres dates (ex. Evolution entre 2012 et 2022) ou d’autres pays d’Afrique de l’Ouest.
library(sf)| gdlcode | Region | pop2002 | pop2022 | hdi2002 | hdi2022 | |
|---|---|---|---|---|---|---|
| 20 | CIVr101 | Abidjan | 3757 | 5984 | 0.581 | 0.569 |
| 21 | CIVr102 | Yamoussoukro | 154 | 245 | 0.636 | 0.619 |
| 22 | CIVr103 | Bas Sassandra | 729 | 1160 | 0.429 | 0.520 |
| 23 | CIVr104 | Comoe | 1031 | 1642 | 0.415 | 0.582 |
| 24 | CIVr105 | Denguele | 525 | 836 | 0.353 | 0.440 |
| 25 | CIVr106 | Goh-Djiboua | 944 | 1504 | 0.448 | 0.551 |
| 26 | CIVr107 | Lacs | 1513 | 2409 | 0.418 | 0.542 |
| 27 | CIVr108 | Lagunes | 1410 | 2245 | 0.479 | 0.566 |
| 28 | CIVr109 | Montagnes | 1880 | 2994 | 0.400 | 0.492 |
| 29 | CIVr110 | Sassandra-Marahoue | 2224 | 3541 | 0.427 | 0.512 |
| 30 | CIVr111 | Savanes | 864 | 1376 | 0.423 | 0.471 |
| 31 | CIVr112 | Vallee du Bandama | 716 | 1140 | 0.465 | 0.554 |
| 32 | CIVr113 | Woroba | 791 | 1259 | 0.243 | 0.423 |
| 33 | CIVr114 | Zanzan | 1149 | 1829 | 0.348 | 0.491 |
# sélectionne les variables
sel <- don[,c("hdi2002","hdi2022")]
# Tableau standard
quant<-apply(sel,2,quantile)
moy<-apply(sel,2,mean)
ect<-apply(sel,2,sd)
cv<-100*ect/moy
tab<-rbind(quant,moy,ect,cv)
row.names(tab) <-c("Minimum","Q1","Médiane","Q3","Maximum","Moyenne","Ecart-type", "C.V. (%)")
kable(tab, caption="Paramètres principaux", digits =2,
col.names = c("Situation en 2002","Situation en 2022"),
)| Situation en 2002 | Situation en 2022 | |
|---|---|---|
| Minimum | 0.24 | 0.42 |
| Q1 | 0.40 | 0.49 |
| Médiane | 0.42 | 0.53 |
| Q3 | 0.46 | 0.56 |
| Maximum | 0.64 | 0.62 |
| Moyenne | 0.43 | 0.52 |
| Ecart-type | 0.10 | 0.06 |
| C.V. (%) | 21.99 | 10.65 |
par(mfrow=c(2,1))
mintot <-min(c(sel$hdi2002, sel$hdi2022))
maxtot <-max(c(sel$hdi2002, sel$hdi2022))
# Histogramme
hist(sel$hdi2002,
breaks=quantile(sel$hdi2002),
xlim=c(mintot,maxtot),
col="gray80",
main= "Situation en 2002",
xlab = "IDH régional",
ylab = "Fréquence moyenne")
rug(sel$hdi2002, col="black", lwd=2)
lines(density(sel$hdi2002), lty=3,lwd=2)
hist(sel$hdi2022,
breaks=quantile(sel$hdi2022),
xlim=c(mintot,maxtot),
col="gray80",
main= "Situation en 2022",
xlab = "IDH régional",
ylab = "Fréquence moyenne")
rug(sel$hdi2022, col="black", lwd=2)
lines(density(sel$hdi2022), lty=3,lwd=2)# ne conserve que le code et le nom
map <- map[,c("gdlcode","Region","geometry")]
# Affichage du fonds de carte
par(mar=c(0,0,3,0))
plot(map$geometry,
col="gray90",
main = "Code des unités spatiales de la zone d'étude")
# Ajout du code des unités spatiales
coo<-st_coordinates(st_centroid(map))
text(coo, map$gdlcode, cex=0.5,col="black",)library(mapsf)
map<-map[,c("gdlcode","geometry")]
map_don <- merge(map, don, by="gdlcode")
maxequ<-max(don$pop012,don$pop2022)
par(mfrow=c(1,2))
mf_map(map_don$geometry, col="white")
mf_map(map_don, type="prop", var="pop2002",
val_max = maxequ, inches=0.1, col="gray20",
leg_title = "Population",)
mf_layout(title="2012",frame = T, credits = "Source : INS Tunisie")
mf_map(map_don$geometry, col="white")
mf_map(map_don, type="prop", var="pop2022",
val_max = maxequ, inches=0.1, col="gray20",
leg_title = "Population")
mf_layout(title="2022",frame = T, credits = "Source : INS Tunisie")library(mapsf)
map_don <- merge(map, don, by="gdlcode")
maxequ<-max(don$hdi2002,don$hdi2022)
par(mfrow=c(1,2))
mf_map(map_don, type="choro", var="hdi2002",
breaks = "quantile",nbreaks = 5, pal ="Grays",
leg_title = "IDH",leg_val_rnd = 2)
mf_layout(title="2002",frame = T, credits = "Source : Global Data Lab")
mf_map(map_don, type="choro", var="hdi2022",
breaks = "quantile",nbreaks = 5, pal ="Grays",
leg_title = "IDH",leg_val_rnd = 2)
mf_layout(title="2022",frame = T, credits = "Source : Global Data Lab")# prépration de l'analyse
gdlcode<-don$gdlcode
nom<-don$Region
X<-don$hdi2002
Y<-don$hdi2022
tab<-data.frame(gdlcode,nom,X,Y)
# Diagramme
plot(tab$X,tab$Y,
pch=20,
cex=0.8,
col="red",
main = "Evolution de l'IDH",
xlab="IDH 2002",
ylab ="IDH 2022")
text(tab$X,tab$Y,tab$nom,
pos=2,
cex=0.5,
col="blue")cor.test(X,Y, method="pearson")
Pearson's product-moment correlation
data: X and Y
t = 5.4499, df = 12, p-value = 0.0001477
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.5674649 0.9494016
sample estimates:
cor
0.843945
cor.test(X,Y, method="spearman")
Spearman's rank correlation rho
data: X and Y
S = 102, p-value = 0.001742
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.7758242
modreg <- lm(Y~X)
summary(modreg)
Call:
lm(formula = Y ~ X)
Residuals:
Min 1Q Median 3Q Max
-0.047666 -0.013634 -0.003286 0.018386 0.067288
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.30960 0.04016 7.709 5.48e-06 ***
X 0.49425 0.09069 5.450 0.000148 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.03115 on 12 degrees of freedom
Multiple R-squared: 0.7122, Adjusted R-squared: 0.6883
F-statistic: 29.7 on 1 and 12 DF, p-value: 0.0001477
# Diagramme
plot(tab$X,tab$Y,
pch=20,
cex=0.8,
col="red",
main = "Evolution de l'IDH",
xlab="IDH 2002",
ylab ="IDH 2022")
text(tab$X,tab$Y,tab$nom,
pos=2,
cex=0.5,
col="blue")
abline(modreg,col="black",lwd=1)tab$Y_est <- modreg$fitted.values
tab$Y_res <- modreg$residuals
tab<-tab[order(tab$Y_res),]
kable(tab, digits=3)| gdlcode | nom | X | Y | Y_est | Y_res | |
|---|---|---|---|---|---|---|
| 11 | CIVr111 | Savanes | 0.423 | 0.471 | 0.519 | -0.048 |
| 5 | CIVr105 | Denguele | 0.353 | 0.440 | 0.484 | -0.044 |
| 1 | CIVr101 | Abidjan | 0.581 | 0.569 | 0.597 | -0.028 |
| 9 | CIVr109 | Montagnes | 0.400 | 0.492 | 0.507 | -0.015 |
| 10 | CIVr110 | Sassandra-Marahoue | 0.427 | 0.512 | 0.521 | -0.009 |
| 13 | CIVr113 | Woroba | 0.243 | 0.423 | 0.430 | -0.007 |
| 2 | CIVr102 | Yamoussoukro | 0.636 | 0.619 | 0.624 | -0.005 |
| 3 | CIVr103 | Bas Sassandra | 0.429 | 0.520 | 0.522 | -0.002 |
| 14 | CIVr114 | Zanzan | 0.348 | 0.491 | 0.482 | 0.009 |
| 12 | CIVr112 | Vallee du Bandama | 0.465 | 0.554 | 0.539 | 0.015 |
| 8 | CIVr108 | Lagunes | 0.479 | 0.566 | 0.546 | 0.020 |
| 6 | CIVr106 | Goh-Djiboua | 0.448 | 0.551 | 0.531 | 0.020 |
| 7 | CIVr107 | Lacs | 0.418 | 0.542 | 0.516 | 0.026 |
| 4 | CIVr104 | Comoe | 0.415 | 0.582 | 0.515 | 0.067 |
library(mapsf)
# Standardisation des résidus
tab$Y_res_std<-tab$Y_res/sd(tab$Y_res)
# Jointure avec la carte
map<-map[,c("gdlcode","geometry")]
map_reg <- merge(map, tab, by="gdlcode")
# Choix de la palette et des classes
library(RColorBrewer)
mypal<-brewer.pal(n = 6, name = "RdYlBu")
mybreaks = c(-10, -2,-1,0,1,2,10)
mf_map(map_reg, type="choro", var="Y_res_std",
pal = mypal, breaks=mybreaks,
leg_title = "Résidus standardisés",leg_val_rnd = 1)
mf_layout(title="Ecarts à la tendance 2002-2022",frame = T, credits = "Source : Global Data Lab")map_don$cap<-0
map_don$cap[1]<-1
cap<-map_don[map_don$cap==1,]
map_don$dist<-1+as.numeric(st_distance(st_centroid(map_don),st_centroid(cap)))/1000
# Choix de la palette et des classes
mypal<-brewer.pal(n = 7, name = "Greys")
mybreaks = c(0,100,200,300,400,500, 1000)
mf_map(map_don, type="choro", var="dist",
pal = mypal, breaks=mybreaks,
leg_title = "en km",leg_val_rnd = 0)
mf_layout(title="Distance à vol d'oiseau au chef-lieu",frame = T, credits = "Source : INS Tunisie")X<-map_don$dist
Y<-map_don$hdi2022
par(mfrow=c(2,2))
plot(X,Y,main="Modèle arithmétique Y = a.X+b", sub = round(cor(X,Y),3))
plot(Y, log(X), main = "Modèle logarithmique Y = a.log(X)+b",sub = round(cor(log(X),Y),3))
plot(log(Y),X, main = "Modèle exponentiel log(Y) = a.X+b",sub = round(cor(X,log(Y)),3))
plot(log(X),log(Y), main = "Modèle puissance log(Y) = a.log(X)+b",sub = round(cor(log(X),log(Y)),3))